{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 加载所需的包"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn_relief import RReliefF\n",
    "from pandas import read_csv\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import geatpy as ea\n",
    "import sklearn\n",
    "from sklearn.svm import SVR\n",
    "from sklearn.model_selection import cross_val_score\n",
    "from sklearn.model_selection import GridSearchCV "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Q2"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Filter阶段"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "#定义MinMax标准化函数\n",
    "def normalization(data):\n",
    "    _range = np.max(data) - np.min(data)\n",
    "    return (data - np.min(data)) / _range\n",
    "\n",
    "#读取数据\n",
    "dataset = read_csv('Data-368.csv', encoding='utf-8')\n",
    "normlized_data = normalization(dataset)\n",
    "input_matrix = np.asmatrix(normlized_data.iloc[:,1:])\n",
    "label_vector = normlized_data.iloc[:,0]\n",
    "\n",
    "#RReliefF算法处理\n",
    "R = RReliefF()\n",
    "feature_weights = R.fit(input_matrix, label_vector)\n",
    "\n",
    "#权重的标准化\n",
    "normlized_feature_weights = normalization(feature_weights.w_)\n",
    "normlized_feature_weights = normlized_feature_weights / np.sum(normlized_feature_weights)\n",
    "\n",
    "#处理结果导出\n",
    "weights_df = pd.DataFrame(normlized_feature_weights)\n",
    "writer = pd.ExcelWriter('权重及归一化数据.xlsx')\n",
    "weights_df.to_excel(writer,\"权重\")\n",
    "normlized_data.to_excel(writer,\"归一化数据\")\n",
    "writer.save()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Wrapper阶段"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#读取数据\n",
    "Normalized_Data_After_Filter = read_csv('Data-after-Filter.csv')\n",
    "Y = Normalized_Data_After_Filter.iloc[:, 1]\n",
    "X = Normalized_Data_After_Filter.iloc[:, 2:]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### MyProblem"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "class MyProblem(ea.Problem): # 继承Problem父类\n",
    "    def __init__(self):\n",
    "        name = 'MyProblem' # 初始化name（函数名称，可以随意设置）\n",
    "        M = 1 # 初始化M（目标维数）\n",
    "        maxormins = [-1] # 初始化maxormins（目标最小最大化标记列表，1：最小化该目标；-1：最大化该目标）\n",
    "        Dim = 211 # 初始化Dim（决策变量维数）\n",
    "        varTypes = [1] * Dim # 初始化varTypes（决策变量的类型，元素为0表示对应的变量是连续的；1表示是离散的）\n",
    "        lb = [0] * Dim # 决策变量下界\n",
    "        ub = [1] * Dim # 决策变量上界\n",
    "        lbin = [1] * Dim # 决策变量下边界（0表示不包含该变量的下边界，1表示包含）\n",
    "        ubin = [1] * Dim # 决策变量上边界（0表示不包含该变量的上边界，1表示包含）\n",
    "        # 调用父类构造方法完成实例化\n",
    "        ea.Problem.__init__(self, name, M, maxormins, Dim, varTypes, lb, ub, lbin, ubin)\n",
    "        \n",
    "        \n",
    "    def aimFunc(self, pop): # 目标函数\n",
    "        x = pop.Phen # 得到决策变量矩阵\n",
    "        NIND = x.shape[0]\n",
    "        alpha = 0.5\n",
    "        #SVR\n",
    "        \"\"\"grid = GridSearchCV(SVR(), param_grid={\"C\":[100], \"gamma\":['scale']}, cv=10)  \n",
    "        grid.fit(X, Y) \n",
    "        best_params_0 = grid.best_params_\"\"\"\n",
    "        clf = SVR(kernel='rbf', C=100, gamma='scale')\n",
    "        mse_0 = -cross_val_score(clf, X, Y, cv=10, scoring='neg_mean_squared_error')\n",
    "        m_0 = np.mean(mse_0)\n",
    "        f_0 = X.shape[1] \n",
    "        \n",
    "\n",
    "\n",
    "        ObjV = []\n",
    "        for i in range(NIND):\n",
    "            feature_index = np.where(x[[i],]==1)[1]\n",
    "            feature_selection = X.iloc[:, feature_index]\n",
    "            \"\"\"grid = GridSearchCV(SVR(), param_grid={\"C\":[1, 10, 100, 1000, 5000], \"gamma\":['auto', 1e-4, 1e-3, 0.01, 0.1, 1]}, cv=10)  \n",
    "            grid.fit(feature_selection, Y) \n",
    "            best_params = grid.best_params_\"\"\"\n",
    "            clf = SVR(kernel='rbf', C=100, gamma='scale')\n",
    "            mse = -cross_val_score(clf, feature_selection, Y, cv=10, scoring='neg_mean_squared_error')\n",
    "            m_s = np.mean(mse)\n",
    "            f_s = np.sum(x[i])\n",
    "            obj_func = alpha*(f_0 - f_s) / f_0 + (m_0 - m_s) / m_0\n",
    "            ObjV.append(obj_func)\n",
    "        pop.ObjV = np.array([ObjV]).T # 计算目标函数值，赋值给pop种群对象的ObjV属性 "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Main"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# -*- coding: utf-8 -*-\n",
    "import numpy as np\n",
    "import geatpy as ea # import geatpy\n",
    "#from MyProblem import MyProblem # 导入自定义问题接口\n",
    "\n",
    "if __name__ == '__main__':\n",
    "    \"\"\"===============================实例化问题对象===========================\"\"\"\n",
    "    problem = MyProblem() # 生成问题对象\n",
    "    \"\"\"=================================种群设置===============================\"\"\"\n",
    "    Encoding = 'BG'       # 编码方式\n",
    "    NIND = 200             # 种群规模\n",
    "    Field = ea.crtfld(Encoding, problem.varTypes, problem.ranges, problem.borders) # 创建区域描述器\n",
    "    population = ea.Population(Encoding, Field, NIND) # 实例化种群对象（此时种群还没被初始化，仅仅是完成种群对象的实例化）\n",
    "    \"\"\"===============================算法参数设置=============================\"\"\"\n",
    "    myAlgorithm = ea.soea_SEGA_templet(problem, population) # 实例化一个算法模板对象\n",
    "    myAlgorithm.MAXGEN = 100 # 最大进化代数\n",
    "    \"\"\"==========================调用算法模板进行种群进化=======================\"\"\"\n",
    "    [population, obj_trace, var_trace] = myAlgorithm.run() # 执行算法模板\n",
    "    population.save() # 把最后一代种群的信息保存到文件中\n",
    "    # 输出结果\n",
    "    best_gen = np.argmin(problem.maxormins * obj_trace[:, 1]) # 记录最优种群个体是在哪一代\n",
    "    best_ObjV = obj_trace[best_gen, 1]\n",
    "    print('最优的目标函数值为：%s'%(best_ObjV))\n",
    "    print('最优的控制变量值为：')\n",
    "    for i in range(var_trace.shape[1]):\n",
    "        print(var_trace[best_gen, i])\n",
    "    print('有效进化代数：%s'%(obj_trace.shape[0]))\n",
    "    print('最优的一代是第 %s 代'%(best_gen + 1))\n",
    "    print('评价次数：%s'%(myAlgorithm.evalsNum))\n",
    "    print('时间已过 %s 秒'%(myAlgorithm.passTime))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Q3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "Normalized_Data_After_Filter_Wrapper = read_csv('Data-after-Filter-Wrapper.csv',encoding='latin-1')\n",
    "Y = Normalized_Data_After_Filter_Wrapper.iloc[:, 0]\n",
    "X = Normalized_Data_After_Filter_Wrapper.iloc[:, 1:]\n",
    "\n",
    "#十折交叉验证加网格搜索调参\n",
    "grid = GridSearchCV(SVR(), param_grid={\"C\":[1, 5, 10, 50, 100, 500, 1000], \"gamma\":['scale', 0.1, 0.01, 1e-03, 1e-04]}, cv=10)  \n",
    "grid.fit(X, Y) \n",
    "best_params = grid.best_params_\n",
    "\n",
    "clf = SVR(kernel='rbf', **best_params)\n",
    "mse = -cross_val_score(clf, X, Y, cv=10, scoring='neg_mean_squared_error')\n",
    "m_ave = np.mean(mse)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Q4"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 建立产品S含量的预测模型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "S_Modeling_Data = read_csv('S-Modelin-Data.csv')\n",
    "S_Y = S_Modeling_Data.iloc[:, 0]\n",
    "S_X = S_Modeling_Data.iloc[:, 3:]\n",
    "\n",
    "S_Modeling_grid = GridSearchCV(SVR(), param_grid={\"C\":[1, 5, 10, 50, 100, 500, 1000], \"gamma\":['scale', 1, 0.1, 0.01, 1e-03, 1e-04]}, cv=10)  \n",
    "S_Modeling_grid.fit(S_X, S_Y) \n",
    "S_best_params = S_Modeling_grid.best_params_\n",
    "\n",
    "S_clf = SVR(kernel='rbf', **S_best_params)\n",
    "S_mse = -cross_val_score(S_clf, S_X, S_Y, cv=10, scoring='neg_mean_squared_error')\n",
    "S_m_ave = np.mean(S_mse)\n",
    "\n",
    "S_Model = S_clf.fit(S_X, S_Y)\n",
    "S_Model.predict(np.array(S_X.iloc[0,:]).reshape(1, -1))[0]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 读取数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "Normalized_Data_After_Filter_Wrapper = read_csv('Data-after-Filter-Wrapper.csv',encoding='latin-1')\n",
    "Y = Normalized_Data_After_Filter_Wrapper.iloc[:, 0]\n",
    "X = Normalized_Data_After_Filter_Wrapper.iloc[:, 1:]\n",
    "\"\"\"index = np.where(np.array(Y) != 0)[0]#Y取最小值时候归一化后为0\n",
    "Y = Y[index]\n",
    "X = X.iloc[index,:]\"\"\"\n",
    "append_data = X.iloc[:, :2]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Optimization_My_Problem"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "class Optimization_MyProblem(ea.Problem): # 继承Problem父类\n",
    "    def __init__(self):\n",
    "        name = 'Optimization_MyProblem' # 初始化name（函数名称，可以随意设置）\n",
    "        M = 1 # 初始化M（目标维数）\n",
    "        maxormins = [-1] # 初始化maxormins（目标最小最大化标记列表，1：最小化该目标；-1：最大化该目标）\n",
    "        Dim = 19 # 初始化Dim（决策变量维数）\n",
    "        varTypes = [0] * Dim # 初始化varTypes（决策变量的类型，元素为0表示对应的变量是连续的；1表示是离散的）\n",
    "        lb = [0] * Dim # 决策变量下界\n",
    "        ub = [1] * Dim # 决策变量上界\n",
    "        lbin = [1] * Dim # 决策变量下边界（0表示不包含该变量的下边界，1表示包含）\n",
    "        ubin = [1] * Dim # 决策变量上边界（0表示不包含该变量的上边界，1表示包含）\n",
    "        # 调用父类构造方法完成实例化\n",
    "        ea.Problem.__init__(self, name, M, maxormins, Dim, varTypes, lb, ub, lbin, ubin)\n",
    "        \n",
    "        \n",
    "    def aimFunc(self, pop): # 目标函数\n",
    "        Vars = pop.Phen # 得到决策变量矩阵\n",
    "        NIND = Vars.shape[0]\n",
    "        clf = SVR(kernel='rbf', **best_params)\n",
    "        forecast_model = clf.fit(X, Y)\n",
    "        S_clf = SVR(kernel='rbf', **S_best_params)\n",
    "        S_Model = S_clf.fit(S_X, S_Y)\n",
    "        \n",
    "        ObjV = []\n",
    "        for i in range(NIND):\n",
    "            train_Vars = np.hstack((append_data_order, Vars[i]))\n",
    "            loss_predict = forecast_model.predict(np.array(train_Vars).reshape(1, -1))[0]    \n",
    "            reversed_loss_predict = loss_predict * 1.62 + 0.2 #需要调整回原来的数据维度\n",
    "            reversed_loss = (train_Vars[0] * 6.4 + 85.3) - (train_Vars[1] * 5.32 + 85.1)\n",
    "            obj_func = 1 - reversed_loss_predict / reversed_loss\n",
    "            ObjV.append(obj_func)\n",
    "        pop.ObjV = np.array([ObjV]).T # 计算目标函数值，赋值给pop种群对象的ObjV属性   \n",
    "        # 采用可行性法则处理约束\n",
    "        pop.CV = S_Model.predict(Vars).reshape(-1, 1)- (5-3.2)/8.6\n",
    "    def calReferObjV(self): # 设定目标数参考值0.3\n",
    "        referenceObjV = np.array([[0.3]])\n",
    "        return referenceObjV\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Optimization_Main"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "if __name__ == '__main__':\n",
    "    \"\"\"================================实例化问题对象===========================\"\"\"\n",
    "    for sample_num in range(len(append_data)):\n",
    "        try:\n",
    "            print(\"样本\" + str(sample_num))\n",
    "            append_data_order = np.array(append_data.iloc[sample_num,:])\n",
    "            problem = Optimization_MyProblem() # 生成问题对象\n",
    "            \"\"\"==================================种群设置===============================\"\"\"\n",
    "            Encoding = 'RI'       # 编码方式\n",
    "            NIND = 1000            # 种群规模\n",
    "            Field = ea.crtfld(Encoding, problem.varTypes, problem.ranges, problem.borders) # 创建区域描述器\n",
    "            population = ea.Population(Encoding, Field, NIND) # 实例化种群对象（此时种群还没被初始化，仅仅是完成种群对象的实例化）\n",
    "            \"\"\"================================算法参数设置=============================\"\"\"\n",
    "            myAlgorithm = ea.soea_DE_rand_1_bin_templet(problem, population) # 实例化一个算法模板对象\n",
    "            myAlgorithm.MAXGEN = 500 # 最大进化代数\n",
    "            myAlgorithm.mutOper.F = 0.5 # 差分进化中的参数F\n",
    "            myAlgorithm.recOper.XOVR = 0.7 # 重组概率\n",
    "\n",
    "\n",
    "\n",
    "\n",
    "            \"\"\"===========================调用算法模板进行种群进化=======================\"\"\"\n",
    "            [population, obj_trace, var_trace] = myAlgorithm.run() # 执行算法模板\n",
    "            population.save() # 把最后一代种群的信息保存到文件中\n",
    "            # 输出结果\n",
    "            best_gen = np.argmin(problem.maxormins * obj_trace[:, 1]) # 记录最优种群个体是在哪一代\n",
    "            best_ObjV = obj_trace[best_gen, 1]\n",
    "            print('最优的目标函数值为：%s'%(best_ObjV))\n",
    "            print('最优的决策变量值为：')\n",
    "            for i in range(var_trace.shape[1]):\n",
    "                print(var_trace[best_gen, i])\n",
    "            print('有效进化代数：%s'%(obj_trace.shape[0]))\n",
    "            print('最优的一代是第 %s 代'%(best_gen + 1))\n",
    "            print('评价次数：%s'%(myAlgorithm.evalsNum))\n",
    "            print('时间已过 %s 秒'%(myAlgorithm.passTime))\n",
    "            print(\"\")\n",
    "        except RuntimeError:\n",
    "            print(\"样本\" + str(sample_num) + \"没找到可行解。\")\n",
    "            print(\"\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
